ohibc logo
OHI British Columbia | OHI Science | Citation policy

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(raster)
library(data.table)


source('~/github/src/R/common.R')  ###
  ### includes library(tidyverse); library(stringr); dir_M points to ohi directory

dir_git <- '~/github/spp_health_dists'

dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')

### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data  <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
# library(provRmd); prov_setup()

### support scripts
source('~/github/src/R/rast_tools.R')
  ### raster plotting and analyzing scripts

1 Summary

Create a map of the distribution of current species health - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers)

2 Data Sources

3 Methods

3.1 Spatial distribution of current extinction risk

3.1.1 Aggregate mean risk and variance by cell

Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.

cell_risk_file <- file.path(dir_data, 'risk_by_cell_all.csv')


iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int', 
                                        'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
  select(iucn_sid, cat, cat_score) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  as.data.table()

if(!exists('spp_cells')) {
  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
## 
Read 43.7% of 15023644 rows
Read 79.9% of 15023644 rows
Read 15023644 rows and 3 (of 3) columns from 0.312 GB file in 00:00:04
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']

spp_cells_risk_sum <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp = n(), 
            mean_risk = mean(cat_score))

write_csv(spp_cells_risk_sum, cell_risk_file)

3.1.2 Load raster and substitute mean risks

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))

### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell <- read_csv(cell_risk_file,
                         col_types = 'iiddd')

risk_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'n_spp')

plot(risk_rast)

plot(nspp_rast)

risk_clipped <- risk_rast
values(risk_clipped)[values(nspp_rast) < 5] <- NA

plot(risk_clipped)

3.1.3 histogram of mean risk

Looks skewed; does it make sense to look at log risk? Looks like risk (with zeros excluded) might be distributed closer to log-normal.

mean_risk_hist <- ggplot(risk_by_cell) + 
  ggtheme_plot() +
  geom_histogram(aes(x = mean_risk))
print(mean_risk_hist)

3.2 Spatial distribution of trend in extinction risk

3.2.1 Aggregate mean trend by cell

Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.

cell_trend_file <- file.path(dir_data, 'spp_trend_by_loiczid.csv')

iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_trend <- read_csv(file.path(dir_data, 'trend_by_spp.csv')) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  as.data.table()

if(!exists('spp_cells')) {
  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']

spp_cells_trend_sum <- spp_cells_trend %>%
  filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp = n(), mean_trend = mean(trend_score))

write_csv(spp_cells_trend_sum, cell_trend_file)

3.3 Calculate means by species range

3.3.1 Aggregate mean risk and variance by cell

Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.

cell_risk_by_range_file <- c(file.path(dir_data, 'risk_by_cell_by_range.csv'))

iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv')) %>%
  arrange(area_km2) %>%
  mutate(cum_area = cumsum(area_km2),
         range_gp = case_when(cum_area <= max(cum_area) * .33 ~ 'small',
                              cum_area <= max(cum_area) * .67 ~ 'medium',
                              TRUE                       ~ 'large')) %>%
  select(am_sid, iucn_sid, area_km2, range_gp)
# summary(spp_ranges %>% filter(range_gp == 'small') %>% .$area_km2)
#      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#     2522     400481   1180425   2192586   3739745   7681908
# summary(spp_ranges %>% filter(range_gp == 'medium') %>% .$area_km2)
#      Min.   1st Qu.    Median      Mean   3rd Qu.       Max. 
#   7690359   8286343   9184440  15440048  12563147  103892977 
# summary(spp_ranges %>% filter(range_gp == 'large') %>% .$area_km2)
#      Min.   1st Qu.    Median      Mean   3rd Qu.       Max. 
# 105087230 136623452 159164037 167382766 196088390  245559322

spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
  select(iucn_sid, cat, cat_score) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  left_join(spp_ranges) %>%
  as.data.table()

if(!exists('spp_cells')) {
  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']

spp_cells_risk_sum <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid, range_gp) %>%
  summarize(n_spp = n(), 
            mean_risk = mean(cat_score))

write_csv(spp_cells_risk_sum, cell_risk_by_range_file)

3.3.2 Load raster and substitute mean risks

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))

### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_range <- read_csv(file.path(dir_data, 'risk_by_cell_by_range.csv'),
                                  col_types = 'dcdd')

for(range_size in c('small', 'medium', 'large')) {
  ### range_size <- 'medium'
  tmp <- risk_by_cell_by_range %>%
    filter(range_gp == range_size) %>%
    filter(n_spp >= 5)
  
  risk_rast <- subs(loiczid_rast, tmp, 
                    by = 'loiczid', which = 'mean_risk')
  nspp_rast <- subs(loiczid_rast, tmp, 
                    by = 'loiczid', which = 'n_spp')
  
  plot(risk_rast, main = paste0('Mean risk for spp w/range ', range_size, ' (n >= 5)'))
  plot(nspp_rast, main = paste0('Spp count for spp w/range ', range_size, ' (n >= 5)'))

}

3.4 Calculate means by taxa

3.4.1 Aggregate mean risk and variance by cell

Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.

cell_risk_by_taxa_file <- c(file.path(dir_data, 'risk_by_cell_by_taxa.csv'))

iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_taxa <- read_csv(file.path(dir_data, 'am_spp_taxa.csv')) %>%
  select(am_sid, iucn_sid, taxa) %>%
  distinct()

spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
  select(iucn_sid, cat, cat_score) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  left_join(spp_taxa) %>%
  as.data.table()

if(!exists('spp_cells')) {
  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']

spp_cells_risk_sum <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid, taxa) %>%
  summarize(n_spp = n(), 
            mean_risk = mean(cat_score))

write_csv(spp_cells_risk_sum, cell_risk_by_taxa_file)

3.4.2 Load raster and substitute mean risks

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))

### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_taxa <- read_csv(file.path(dir_data, 'risk_by_cell_by_taxa.csv'),
                                 col_types = 'dcdd')

taxa_list <- risk_by_cell_by_taxa$taxa %>% unique()

for(taxa_gp in taxa_list) {
  ### taxa_gp <- 'mammal'
  tmp <- risk_by_cell_by_taxa %>%
    filter(taxa == taxa_gp) %>%
    filter(n_spp >= 0)
  
  risk_rast <- subs(loiczid_rast, tmp, 
                    by = 'loiczid', which = 'mean_risk')
  nspp_rast <- subs(loiczid_rast, tmp, 
                    by = 'loiczid', which = 'n_spp')
  
  plot(risk_rast, main = paste0('Mean risk for spp in ', taxa_gp, ' (n >= 0)'))
  plot(nspp_rast, main = paste0('Spp count for spp in ', taxa_gp, ' (n >= 0)'))

}


# prov_wrapup(commit_outputs = FALSE)